Task1

library(data.table)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(ggfortify)
library(scatterplot3d)

matches=readRDS("C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_matches.rds")

odds=readRDS('C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_odd_details.rds')

matches$type=NULL
matches$leagueId=NULL
matches$home=NULL
matches$away=NULL
matches$date=NULL

matches[,c('score_home','score_away') := tstrsplit(score,':')]
matches[,score_home:=as.numeric(score_home)]
## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
matches[,score_away:=as.numeric(score_away)]
matches[,c('total_score'):=score_home+score_away]
matches= matches[complete.cases(matches)]
matches= matches[score_home>score_away, Result:="Home"]
matches= matches[score_home<score_away, Result:="Away"]
matches= matches[score_home==score_away, Result:="Draw"]

matches[,score_home:=NULL]
matches[,score_away:=NULL]

matches[,ou:=ifelse(total_score>=2.5,"over","under")]

Ordering depending on bookmakers

setorder(odds,matchId,bookmaker,betType,oddtype,date)
odds2 = odds[,.SD[.N],by=list(matchId,bookmaker,betType,oddtype)]
odds3 = dcast(odds2,matchId~betType+oddtype+bookmaker, value.var = 'odd')

odds4 = odds3[,lapply(.SD,function(x){sum(!is.na(x))})]

We have reached the maximum numbers of recurred odds to analyze wisely and selected first 70 ones

columnstokeep = names(sort(unlist(odds4),decreasing = T)[1:70])

odds5 = odds3[,columnstokeep,with=F]

odds6 = odds5[complete.cases(odds5)]


odds7=merge(odds6,matches[,c("matchId","ou")],by="matchId")
odds7
##        matchId 1x2_odd1_10Bet 1x2_odd1_12BET 1x2_odd1_188BET
##    1: 02oVDuv1           1.80           1.83            1.83
##    2: 04PCiQzK           1.46           1.47            1.50
##    3: 04vrPwsg           2.00           2.02            2.07
##    4: 04zko0D5           1.13           1.13            1.14
##    5: 04zko0D5           1.13           1.13            1.14
##   ---                                                       
## 2721: zwD6OZpK           1.61           1.62            1.60
## 2722: zwD77hZh           1.77           1.87            1.86
## 2723: zwYQNFfa           2.08           2.17            2.17
## 2724: zy3x0uCs           1.21           1.22            1.23
## 2725: zylytbNg           2.15           2.20            2.19
##       1x2_odd1_Betclic 1x2_odd1_Betsafe 1x2_odd1_Betsson 1x2_odd1_SBOBET
##    1:             1.80             1.85             1.85            1.86
##    2:             1.50             1.47             1.53            1.47
##    3:             2.00             2.01             2.01            2.04
##    4:             1.14             1.15             1.15            1.14
##    5:             1.14             1.15             1.15            1.14
##   ---                                                                   
## 2721:             1.60             1.66             1.68            1.63
## 2722:             1.80             1.79             1.85            1.85
## 2723:             2.10             2.15             2.10            2.16
## 2724:             1.22             1.20             1.20            1.22
## 2725:             2.10             2.20             2.20            2.21
##       1x2_odd1_bet-at-home 1x2_odd1_bet365 1x2_odd2_10Bet 1x2_odd2_12BET
##    1:                 1.78            1.85           4.85           4.70
##    2:                 1.50            1.50           7.63           7.66
##    3:                 1.96            2.05           4.05           4.00
##    4:                 1.13            1.14          24.00          17.00
##    5:                 1.13            1.14          24.00          17.00
##   ---                                                                   
## 2721:                 1.60            1.65           5.45           5.40
## 2722:                 1.80            1.85           4.65           4.22
## 2723:                 2.00            2.10           3.78           3.64
## 2724:                 1.20            1.22          15.96          13.97
## 2725:                 2.10            2.14           3.40           3.25
##       1x2_odd2_188BET 1x2_odd2_Betclic 1x2_odd2_Betsafe 1x2_odd2_Betsson
##    1:            4.55             4.50             4.75             4.75
##    2:            8.10             6.50             7.60             6.65
##    3:            4.00             3.90             4.20             4.20
##    4:           21.00            19.50            20.00            20.00
##    5:           21.00            19.50            20.00            20.00
##   ---                                                                   
## 2721:            5.40             5.25             5.10             5.15
## 2722:            4.40             4.25             4.50             4.65
## 2723:            3.70             3.50             3.55             3.50
## 2724:           15.00            12.00            15.00            15.50
## 2725:            3.40             3.25             3.45             3.45
##       1x2_odd2_SBOBET 1x2_odd2_bet-at-home 1x2_odd2_bet365 1x2_oddX_10Bet
##    1:             4.7                 4.58            4.75           3.55
##    2:             7.8                 6.80            7.00           4.04
##    3:             3.8                 4.02            4.10           3.30
##    4:            20.0                20.04           23.00           8.75
##    5:            20.0                20.04           23.00           8.75
##   ---                                                                    
## 2721:             5.2                 5.19            5.50           4.20
## 2722:             4.4                 4.30            4.59           3.65
## 2723:             3.6                 3.55            3.60           3.23
## 2724:            16.5                14.50           15.00           6.22
## 2725:             3.3                 3.30            3.50           3.55
##       1x2_oddX_12BET 1x2_oddX_188BET 1x2_oddX_Betclic 1x2_oddX_Betsafe
##    1:           3.45            3.65             3.40             3.70
##    2:           4.18            4.15             3.90             4.05
##    3:           3.28            3.25             3.25             3.25
##    4:           8.40            8.70             8.00             9.50
##    5:           8.40            8.70             8.00             9.50
##   ---                                                                 
## 2721:           4.05            4.30             4.00             4.25
## 2722:           3.56            3.60             3.50             3.60
## 2723:           3.19            3.30             3.20             3.20
## 2724:           6.30            6.50             5.75             6.25
## 2725:           3.50            3.45             3.50             3.60
##       1x2_oddX_Betsson 1x2_oddX_SBOBET 1x2_oddX_bet-at-home
##    1:             3.70            3.35                 3.58
##    2:             3.85            4.09                 3.70
##    3:             3.25            3.40                 3.23
##    4:             9.50            8.25                 7.96
##    5:             9.50            8.25                 7.96
##   ---                                                      
## 2721:             4.25            4.10                 4.10
## 2722:             3.75            3.45                 3.50
## 2723:             3.30            3.20                 3.30
## 2724:             6.00            5.80                 5.50
## 2725:             3.60            3.40                 3.55
##       1x2_oddX_bet365 1x2_odd1_BetVictor 1x2_odd1_Pinnacle 1x2_odd1_Unibet
##    1:            3.60               1.83              1.83            1.85
##    2:            4.00               1.50              1.47            1.53
##    3:            3.40               1.95              2.06            2.00
##    4:            9.00               1.14              1.14            1.16
##    5:            9.00               1.14              1.14            1.16
##   ---                                                                     
## 2721:            4.20               1.65              1.64            1.64
## 2722:            3.75               1.85              1.88            1.80
## 2723:            3.30               2.15              2.18            2.15
## 2724:            6.00               1.22              1.23            1.22
## 2725:            3.60               2.20              2.21            2.19
##       1x2_odd2_BetVictor 1x2_odd2_Pinnacle 1x2_odd2_Unibet
##    1:               4.80              5.00            4.80
##    2:               7.50              8.50            6.75
##    3:               4.20              4.18            4.15
##    4:              26.00             23.00           23.00
##    5:              26.00             23.00           23.00
##   ---                                                     
## 2721:               4.50              5.53            5.70
## 2722:               4.59              4.58            4.50
## 2723:               3.80              3.92            3.30
## 2724:              19.00             18.30           14.00
## 2725:               3.30              3.45            3.42
##       1x2_oddX_BetVictor 1x2_oddX_Pinnacle 1x2_oddX_Unibet
##    1:               3.70              3.63            3.65
##    2:               4.20              4.40            3.70
##    3:               3.40              3.38            3.35
##    4:               9.00              8.85            8.50
##    5:               9.00              8.85            8.50
##   ---                                                     
## 2721:               4.33              4.37            4.20
## 2722:               3.75              3.73            3.60
## 2723:               3.25              3.26            3.25
## 2724:               6.50              6.70            5.50
## 2725:               3.70              3.59            3.62
##       1x2_odd1_Interwetten 1x2_odd1_William Hill 1x2_odd1_bwin
##    1:                 1.90                  1.83          1.83
##    2:                 1.60                  1.50          1.48
##    3:                 2.00                  1.95          2.00
##    4:                 1.17                  1.14          1.16
##    5:                 1.17                  1.14          1.16
##   ---                                                         
## 2721:                 1.70                  1.65          1.62
## 2722:                 1.85                  1.85          1.83
## 2723:                 2.10                  2.04          2.00
## 2724:                 1.25                  1.22          1.20
## 2725:                 2.20                  2.15          2.10
##       1x2_odd2_Interwetten 1x2_odd2_William Hill 1x2_odd2_bwin
##    1:                  4.1                  4.80          4.60
##    2:                  5.0                  7.50          7.00
##    3:                  3.6                  4.33          4.10
##    4:                 15.0                 19.00         18.50
##    5:                 15.0                 19.00         18.50
##   ---                                                         
## 2721:                  5.0                  5.00          5.00
## 2722:                  4.0                  4.33          4.20
## 2723:                  3.2                  3.75          3.65
## 2724:                 10.0                 15.00         15.00
## 2725:                  3.4                  3.40          3.50
##       1x2_oddX_Interwetten 1x2_oddX_William Hill 1x2_oddX_bwin
##    1:                 3.50                  3.50          3.50
##    2:                 3.60                  3.75          4.00
##    3:                 3.30                  3.10          3.30
##    4:                 7.50                  7.50          7.25
##    5:                 7.50                  7.50          7.25
##   ---                                                         
## 2721:                 3.70                  4.00          4.33
## 2722:                 3.45                  3.50          3.50
## 2723:                 3.20                  3.25          3.25
## 2724:                 5.00                  5.50          5.50
## 2725:                 3.30                  3.50          3.50
##       1x2_odd1_Betway 1x2_odd2_Betway 1x2_oddX_Betway 1x2_odd1_888sport
##    1:            1.83            4.75            3.60              1.82
##    2:            1.50            6.75            3.85              1.57
##    3:            2.05            4.00            3.25              2.00
##    4:            1.15           23.00            8.50              1.15
##    5:            1.15           23.00            8.50              1.15
##   ---                                                                  
## 2721:            1.67            5.25            4.20              1.61
## 2722:            1.83            4.33            3.50              1.83
## 2723:            2.10            3.45            3.25              2.00
## 2724:            1.20           15.00            5.75              1.22
## 2725:            2.15            3.50            3.50              2.16
##       1x2_odd2_888sport 1x2_oddX_888sport 1x2_odd1_Sportingbet
##    1:              4.70              3.60                 1.85
##    2:              6.50              4.00                 1.50
##    3:              4.15              3.35                 2.00
##    4:             19.00              8.50                 1.15
##    5:             19.00              8.50                 1.15
##   ---                                                         
## 2721:              5.60              4.10                 1.57
## 2722:              4.33              3.60                 1.75
## 2723:              3.75              3.20                 2.00
## 2724:             15.00              5.50                 1.20
## 2725:              3.35              3.55                 2.03
##       1x2_odd2_Sportingbet 1x2_oddX_Sportingbet 1x2_odd1_Paddy Power
##    1:                 4.75                 3.50                 1.80
##    2:                 6.50                 3.60                 1.57
##    3:                 4.00                 3.30                 2.00
##    4:                15.00                 7.00                 1.12
##    5:                15.00                 7.00                 1.12
##   ---                                                               
## 2721:                 5.20                 4.20                 1.62
## 2722:                 4.33                 3.75                 1.85
## 2723:                 3.40                 3.25                 2.15
## 2724:                11.00                 5.75                 1.22
## 2725:                 3.30                 3.70                 2.15
##       1x2_odd2_Paddy Power 1x2_oddX_Paddy Power 1x2_odd1_youwin
##    1:                  4.5                 3.70            1.78
##    2:                  6.5                 3.75            1.50
##    3:                  4.0                 3.30            1.97
##    4:                 23.0                 8.50            1.11
##    5:                 23.0                 8.50            1.11
##   ---                                                          
## 2721:                  5.0                 4.33            1.57
## 2722:                  4.2                 3.60            1.83
## 2723:                  3.5                 3.25            2.20
## 2724:                 13.0                 6.00            1.24
## 2725:                  3.4                 3.50            1.95
##       1x2_odd2_youwin 1x2_oddX_youwin ha_1_bet365 ha_2_bet365 ah_1_10Bet
##    1:            4.50            3.40        1.30        3.39       2.09
##    2:            7.25            4.00        1.13        5.50       1.49
##    3:            4.07            3.26        1.44        2.63       1.32
##    4:           14.00            6.75        1.02       19.00       2.15
##    5:           14.00            6.75        1.02       19.00       2.15
##   ---                                                                   
## 2721:            5.20            4.20        1.25        3.75       1.78
## 2722:            4.50            3.50        1.33        3.25       1.88
## 2723:            4.00            3.10        1.50        2.50       1.49
## 2724:           18.50            6.10        1.05       11.00       1.62
## 2725:            3.20            3.60        1.53        2.37       1.59
##       ah_1_12BET ah_1_188BET ah_1_Pinnacle ah_1_SBOBET ah_1_bet365
##    1:       1.84        1.83          1.32        1.86        1.24
##    2:       2.12        1.82          1.81        2.11        2.25
##    3:       2.04        2.44          1.76        2.05        1.25
##    4:       1.70        1.68          2.22        1.94        1.93
##    5:       1.70        1.68          2.22        1.94        1.93
##   ---                                                             
## 2721:       2.11        1.61          1.43        1.79        1.77
## 2722:       2.20        1.89          1.64        1.86        1.30
## 2723:       1.85        1.84          1.86        1.83        1.20
## 2724:       1.89        1.69          1.88        1.89        1.65
## 2725:       1.64        1.91          1.91        1.93        1.57
##       ah_2_10Bet    ou
##    1:       1.78 under
##    2:       5.75  over
##    3:       3.14 under
##    4:       2.28  over
##    5:       2.28  over
##   ---                 
## 2721:       2.09 under
## 2722:       2.00  over
## 2723:       2.54  over
## 2724:      10.75 under
## 2725:       2.42 under
pcaodds = prcomp(odds7[,c(-1,-71)],scale. = T)
summary(pcaodds)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     6.4227 4.4158 1.23886 1.07396 1.02223 0.98482
## Proportion of Variance 0.5978 0.2826 0.02224 0.01672 0.01514 0.01406
## Cumulative Proportion  0.5978 0.8804 0.90268 0.91940 0.93454 0.94860
##                            PC7     PC8     PC9   PC10    PC11   PC12
## Standard deviation     0.94739 0.93171 0.68884 0.5633 0.35911 0.2880
## Proportion of Variance 0.01301 0.01258 0.00688 0.0046 0.00187 0.0012
## Cumulative Proportion  0.96161 0.97419 0.98106 0.9857 0.98753 0.9887
##                           PC13    PC14    PC15    PC16    PC17    PC18
## Standard deviation     0.26816 0.22743 0.21007 0.19465 0.18837 0.17459
## Proportion of Variance 0.00104 0.00075 0.00064 0.00055 0.00051 0.00044
## Cumulative Proportion  0.98978 0.99053 0.99116 0.99171 0.99223 0.99267
##                           PC19    PC20    PC21    PC22   PC23    PC24
## Standard deviation     0.17059 0.16249 0.15020 0.14589 0.1434 0.13887
## Proportion of Variance 0.00042 0.00038 0.00033 0.00031 0.0003 0.00028
## Cumulative Proportion  0.99309 0.99347 0.99380 0.99411 0.9944 0.99469
##                           PC25    PC26    PC27    PC28    PC29    PC30
## Standard deviation     0.13504 0.13341 0.13086 0.12806 0.12454 0.12318
## Proportion of Variance 0.00026 0.00026 0.00025 0.00024 0.00022 0.00022
## Cumulative Proportion  0.99495 0.99521 0.99546 0.99569 0.99592 0.99614
##                           PC31    PC32    PC33    PC34    PC35    PC36
## Standard deviation     0.11991 0.11481 0.11294 0.11161 0.10896 0.10823
## Proportion of Variance 0.00021 0.00019 0.00018 0.00018 0.00017 0.00017
## Cumulative Proportion  0.99635 0.99654 0.99672 0.99690 0.99708 0.99725
##                           PC37    PC38    PC39    PC40    PC41    PC42
## Standard deviation     0.10551 0.10325 0.09894 0.09757 0.09660 0.09401
## Proportion of Variance 0.00016 0.00015 0.00014 0.00014 0.00014 0.00013
## Cumulative Proportion  0.99741 0.99756 0.99770 0.99784 0.99798 0.99811
##                           PC43    PC44    PC45    PC46    PC47    PC48
## Standard deviation     0.09026 0.08945 0.08684 0.08604 0.08577 0.08535
## Proportion of Variance 0.00012 0.00012 0.00011 0.00011 0.00011 0.00011
## Cumulative Proportion  0.99822 0.99834 0.99845 0.99856 0.99866 0.99877
##                          PC49    PC50    PC51    PC52    PC53    PC54
## Standard deviation     0.0822 0.07979 0.07707 0.07655 0.07455 0.07286
## Proportion of Variance 0.0001 0.00009 0.00009 0.00008 0.00008 0.00008
## Cumulative Proportion  0.9989 0.99896 0.99904 0.99913 0.99921 0.99929
##                           PC55    PC56    PC57    PC58    PC59    PC60
## Standard deviation     0.07205 0.06786 0.06609 0.06597 0.06483 0.06080
## Proportion of Variance 0.00008 0.00007 0.00006 0.00006 0.00006 0.00005
## Cumulative Proportion  0.99936 0.99943 0.99949 0.99956 0.99962 0.99967
##                           PC61    PC62    PC63    PC64    PC65    PC66
## Standard deviation     0.05841 0.05648 0.05417 0.05262 0.05151 0.04895
## Proportion of Variance 0.00005 0.00005 0.00004 0.00004 0.00004 0.00003
## Cumulative Proportion  0.99972 0.99977 0.99981 0.99985 0.99989 0.99992
##                           PC67    PC68    PC69
## Standard deviation     0.04826 0.04127 0.03752
## Proportion of Variance 0.00003 0.00002 0.00002
## Cumulative Proportion  0.99995 0.99998 1.00000

The model can explain 59% with PC1 and can be increased to 90% by using the first three ones.

PCA2D

autoplot(pcaodds,data=odds7,colour="ou")

2D plot of PCA is shown above and colours show if the game is ended over or under. Regarding the 2D plot, we can not classify bookmakers’ odd data. Let’s check on 3D data

PCA3D

pcadt = as.data.table(pcaodds$x)
pcadt
##               PC1        PC2        PC3        PC4         PC5         PC6
##    1:   0.1094996 -2.8165224  1.4104979 -0.9471604  0.03794329  0.67351289
##    2:  -3.0756491 -2.7842941 -2.7050423  1.9059103 -0.01578599 -0.06709497
##    3:   1.3709472 -3.1377088 -0.9999828  2.5622927  0.27547972  0.40470371
##    4: -23.4735068  6.2043268  1.2517322 -0.8189955 -0.28934379 -0.86054643
##    5: -23.4735068  6.2043268  1.2517322 -0.8189955 -0.28934379 -0.86054643
##   ---                                                                     
## 2721:  -1.8650193 -2.0208963  0.6246313 -0.7078497  0.51104710  0.23256529
## 2722:   0.2844541 -2.7909304  1.1473108  0.3462261  0.91200215  0.25469514
## 2723:   1.9530650 -3.0205871 -0.6600313 -0.2168889  0.48359437 -0.18563978
## 2724: -14.0294502  0.8648433 -6.9213802  1.5674308  0.66730178 -0.42089214
## 2725:   1.5368982 -2.3202307 -0.3591720 -0.3352899 -0.39602160 -0.21985557
##              PC7        PC8         PC9       PC10        PC11
##    1:  0.4050462 -0.1640882 -0.47008849  0.1591410  0.08352374
##    2: -1.5277973  0.6692502 -2.23272954  0.2091531 -0.20469269
##    3:  0.9780660 -0.5705533  0.25449716  0.2998972  0.05624781
##    4: -0.6843605 -0.9763892 -0.88193349  1.5925573  1.24394682
##    5: -0.6843605 -0.9763892 -0.88193349  1.5925573  1.24394682
##   ---                                                         
## 2721: -0.3548408  1.2288559  0.39704400 -0.9225916 -0.05511780
## 2722:  0.1244794  0.8342132 -0.01428751 -0.1508101 -0.03194472
## 2723:  0.1107918 -0.3243119  0.39394545  0.2595864  0.03477708
## 2724: -1.7992546  0.4402260 -6.85139209  0.7000027 -0.34053743
## 2725:  0.2184204 -1.1963544  0.38383578 -0.5797396  0.05441387
##               PC12        PC13        PC14         PC15         PC16
##    1: -0.105944230 -0.07878307  0.03262987  0.055638097 -0.089186869
##    2: -0.371674642 -0.16428381 -0.03290172  0.006974182 -0.171403999
##    3:  0.123919495 -0.13023886 -0.00640845 -0.130194988 -0.078002677
##    4: -1.582977205  0.03691612 -0.29679335 -1.326196447 -1.201234950
##    5: -1.582977205  0.03691612 -0.29679335 -1.326196447 -1.201234950
##   ---                                                               
## 2721: -0.105980828  0.07227395  0.17979632  0.127133984  0.092402608
## 2722: -0.076581278 -0.06158830  0.05854913  0.057466006  0.002226481
## 2723: -0.009962589 -0.05283796  0.08976455  0.047732949 -0.035387164
## 2724: -0.364378531 -0.53831587 -0.26774331  0.169065260 -0.189259445
## 2725: -0.010281369 -0.05356648  0.14196746  0.077412781  0.027627301
##              PC17        PC18         PC19        PC20        PC21
##    1: -0.02228185 -0.08106838 -0.016813083 -0.11286747 -0.04048568
##    2:  0.03139231 -0.05078094  0.100955142  0.05054150  0.06150107
##    3:  0.02108452  0.06234882  0.046172801  0.00367468  0.06586037
##    4:  0.66555157 -0.83887361 -0.325946002 -0.36937499 -0.04130945
##    5:  0.66555157 -0.83887361 -0.325946002 -0.36937499 -0.04130945
##   ---                                                             
## 2721: -0.17494004 -0.05725638  0.051424066 -0.27185719 -0.02227207
## 2722: -0.05059468  0.03584093  0.010040668  0.02115357 -0.04040637
## 2723:  0.01916138 -0.04733382 -0.057066165 -0.02229914  0.02270040
## 2724: -0.04570588 -0.51110377  0.085752886 -0.17574701  0.01131185
## 2725: -0.12437726 -0.01150243  0.006787161  0.01602313  0.03068628
##               PC22          PC23         PC24         PC25          PC26
##    1: -0.017622918 -5.495537e-03  0.062707173 -0.006027917  0.0398904900
##    2: -0.033167493 -1.174176e-01  0.003727930  0.125360574 -0.0284400415
##    3:  0.001346656  7.461795e-04 -0.008629437  0.045853243 -0.0217761261
##    4:  0.221934753  3.422885e-01 -0.257023947 -1.136345344  0.1351373514
##    5:  0.221934753  3.422885e-01 -0.257023947 -1.136345344  0.1351373514
##   ---                                                                   
## 2721:  0.016699255  1.201862e-01 -0.029884486  0.022645722  0.0999479488
## 2722: -0.030207213  2.976507e-02  0.074096500 -0.116210905  0.0186459121
## 2723: -0.030834127  8.449004e-05 -0.010237785  0.043453028  0.0001355646
## 2724: -0.263305986  5.732183e-02  0.550386611 -0.019115079 -0.4583536189
## 2725: -0.070903552  4.080252e-02 -0.042191764 -0.123379819 -0.0310108344
##               PC27          PC28        PC29         PC30        PC31
##    1: -0.062396837 -0.0023505638  0.01946260  0.114915343 -0.02439480
##    2: -0.083268438 -0.0320332030 -0.08106263 -0.076181714 -0.05498551
##    3:  0.054053562 -0.0002475425 -0.01594402  0.041036132 -0.03903440
##    4:  0.105569609 -0.7821914442 -0.23204365 -0.034693391  0.16884332
##    5:  0.105569609 -0.7821914442 -0.23204365 -0.034693391  0.16884332
##   ---                                                                
## 2721: -0.132889329  0.0673962396  0.03613995  0.084690320 -0.06023499
## 2722:  0.028532078 -0.0080006360  0.02008222  0.061557493  0.01742559
## 2723: -0.034495103 -0.0136679246  0.07798585 -0.009192346 -0.03049386
## 2724: -0.450581883 -0.4275874489 -0.08344979 -0.300326244  0.14073870
## 2725:  0.007768031  0.0251134220  0.02864915  0.030301571  0.02777475
##              PC32        PC33         PC34        PC35         PC36
##    1: -0.03274894  0.01832449  0.044599906  0.05625284  0.004519915
##    2:  0.14090464  0.14081884 -0.029565231 -0.09175212 -0.127747971
##    3:  0.05522705 -0.01238936  0.011275701 -0.01048005  0.037549144
##    4: -0.02056760  0.20025409  0.567384143 -0.07717095  0.446986229
##    5: -0.02056760  0.20025409  0.567384143 -0.07717095  0.446986229
##   ---                                                              
## 2721:  0.02716941 -0.02396095 -0.124799007  0.02205239 -0.033668144
## 2722: -0.01740148  0.02932837 -0.001514987  0.01180951  0.037842167
## 2723: -0.03697881  0.02801642 -0.004845223  0.01235249 -0.004131060
## 2724:  0.14025518 -0.18075513  0.430064000 -0.02184590 -0.292874705
## 2725: -0.05362789 -0.10407587  0.019829875 -0.04062907 -0.033022883
##               PC37          PC38         PC39         PC40        PC41
##    1: -0.032861441 -0.0633794242 -0.046149597  0.004373496 -0.01084611
##    2:  0.093311575  0.0586987109  0.060585055  0.040741226  0.00487624
##    3:  0.009803268  0.0415914842 -0.003756978  0.028446140 -0.05107400
##    4:  0.010713647 -0.0903579812  0.135497480  0.279439491  0.09845857
##    5:  0.010713647 -0.0903579812  0.135497480  0.279439491  0.09845857
##   ---                                                                 
## 2721: -0.001887168  0.0839098601 -0.049040368 -0.031217232 -0.02737934
## 2722:  0.009032791 -0.0252548455 -0.001330507  0.004645809 -0.02557730
## 2723: -0.027596199  0.0007757012 -0.067885502 -0.007742378 -0.05401345
## 2724:  0.044397564 -0.1994510730 -0.057322543 -0.200525509  0.05440102
## 2725:  0.016119360  0.0224151988 -0.045472052 -0.031434933 -0.04194091
##               PC42        PC43        PC44          PC45        PC46
##    1:  0.019419812  0.02701403  0.04854765  0.0097897080 -0.02183368
##    2: -0.144963900  0.02761574  0.09863950  0.0345945977 -0.02805210
##    3:  0.008640447  0.07040731 -0.05450916  0.0002682971 -0.02230707
##    4:  0.176000461 -0.28563034 -0.15293696 -0.0320327421  0.09555602
##    5:  0.176000461 -0.28563034 -0.15293696 -0.0320327421  0.09555602
##   ---                                                               
## 2721: -0.037711764  0.05366819 -0.02319373 -0.0163214126 -0.01254730
## 2722: -0.028639285  0.02857550 -0.02285538  0.0995242363  0.04191148
## 2723:  0.025982034  0.01832109  0.02192807  0.0333122803  0.01119037
## 2724:  0.240227804  0.42834916 -0.18371303  0.1811024281  0.16981106
## 2725: -0.017087349  0.02298037  0.02310519  0.0497932510  0.01753672
##               PC47        PC48         PC49         PC50        PC51
##    1:  0.043875867  0.04769054 -0.010578056  0.016648463 -0.01748164
##    2: -0.031704868  0.12826316  0.020374216 -0.027582287  0.03078230
##    3:  0.005558739 -0.03561931 -0.006056237 -0.008866502 -0.02548964
##    4: -0.259737971  0.20362851  0.036605283  0.031639152  0.02676519
##    5: -0.259737971  0.20362851  0.036605283  0.031639152  0.02676519
##   ---                                                               
## 2721:  0.046264158  0.00433486 -0.089630469  0.060309779  0.01433026
## 2722: -0.022235597 -0.02101647  0.027663604  0.026437528 -0.01142296
## 2723: -0.061279485 -0.02843915  0.040460833  0.013152514 -0.05636214
## 2724: -0.284481254 -0.19081632  0.726526213  0.283241624  0.16305475
## 2725: -0.016681815 -0.07872398 -0.002173336 -0.066971006 -0.02676406
##               PC52         PC53         PC54        PC55        PC56
##    1:  0.007898021  0.003798567 -0.018835089 -0.04038545 -0.02417409
##    2:  0.053133637 -0.058248975  0.031563814 -0.01585609 -0.01524540
##    3:  0.032414163 -0.012974830 -0.038542471  0.02812098 -0.02276349
##    4: -0.036563346 -0.005938669 -0.034866692 -0.19871451 -0.15963418
##    5: -0.036563346 -0.005938669 -0.034866692 -0.19871451 -0.15963418
##   ---                                                               
## 2721:  0.015565485  0.032823474 -0.001089686  0.01354880 -0.03492478
## 2722:  0.034072549  0.004888457  0.002736128 -0.01244107 -0.01702541
## 2723: -0.018657471  0.046753738 -0.083608151 -0.04744320  0.00388358
## 2724:  0.079605022  0.037671421 -0.076590224 -0.05069652 -0.02549552
## 2725: -0.021597273  0.012494105  0.026835068  0.05230312 -0.01057994
##                PC57         PC58        PC59          PC60         PC61
##    1:  0.0025523519 -0.002271119  0.02101917 -0.0095324127  0.016084434
##    2:  0.0116598476  0.045321923  0.08317484 -0.0567077761 -0.040147849
##    3:  0.0175285306  0.022987256  0.01099193  0.0041480220 -0.019288320
##    4: -0.0198279019  0.124407525  0.10483475 -0.1083259612  0.033070146
##    5: -0.0198279019  0.124407525  0.10483475 -0.1083259612  0.033070146
##   ---                                                                  
## 2721: -0.0002601175 -0.041651550  0.02111144 -0.0007344976  0.006003927
## 2722: -0.0761466714 -0.043408222  0.03805816  0.0141113724  0.015765373
## 2723: -0.0509622818 -0.073337214 -0.02983894  0.0656498354 -0.007682648
## 2724: -0.2673811735 -0.185746997 -0.12715742  0.1214070184  0.004342155
## 2725: -0.0071283862  0.017229940  0.00749681 -0.0187720615  0.007692525
##              PC62         PC63         PC64         PC65         PC66
##    1: -0.01027039  0.003020514  0.013088730 -0.005440970  0.005979006
##    2: -0.07320618  0.075317386 -0.081157709 -0.045682721 -0.120745954
##    3:  0.01633244 -0.003464083 -0.004143754 -0.008208025 -0.016563021
##    4:  0.03688770 -0.014569985  0.119909674  0.002295273  0.055647470
##    5:  0.03688770 -0.014569985  0.119909674  0.002295273  0.055647470
##   ---                                                                
## 2721: -0.02140903 -0.022364872 -0.029971134  0.013904637  0.030952466
## 2722:  0.04303969 -0.007619967  0.043082323 -0.045104350  0.015473048
## 2723:  0.02087650 -0.021562033  0.063066865 -0.036176166  0.001664523
## 2724:  0.03704126  0.069442237  0.020716406 -0.010762858  0.117163527
## 2725: -0.01255886  0.008039082  0.003563335 -0.006034935  0.003615364
##               PC67          PC68         PC69
##    1: -0.000532480 -6.551427e-03  0.008969380
##    2: -0.004875613 -2.769420e-02  0.007347256
##    3: -0.017157192  4.965918e-05  0.006353475
##    4: -0.036089225  1.302745e-02 -0.008037706
##    5: -0.036089225  1.302745e-02 -0.008037706
##   ---                                        
## 2721: -0.007693429  6.857856e-03 -0.005375186
## 2722: -0.016603208 -3.786412e-03 -0.025533248
## 2723: -0.054256696 -2.368965e-02  0.023516895
## 2724:  0.002280593 -1.728587e-02 -0.033818251
## 2725: -0.012812491 -2.046184e-03  0.002322718
p <- plot_ly(pcadt, x = ~PC1, y = ~PC2, z = ~PC3, color = ~odds7$ou, colors = c('#BF382A', '#0C4B8E')) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = 'PC1'),
                     yaxis = list(title = 'PC2'),
                     zaxis = list(title = 'PC3')))
p

Regarding the 3D plot, we can not classify bookmakers’ odd data again.

Manhattan

#Distance matrix
manhattan=dist(pcadt,"manhattan")
manhattan[is.na(manhattan)]=0

#Scaling
manhattan=cmdscale(manhattan)
manhattandt=as.data.table(manhattan)
colnames(manhattandt)=c("C1","C2")

m=plot_ly(manhattandt, x = ~C1, y = ~C2, color = ~odds7$ou, colors = c('#BF382A', '#0C4B8E')) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = 'PC1'),
                     yaxis = list(title = 'PC2')))
m

Euclidean

#Distance matrix
eu=dist(pcadt)
eu[is.na(eu)]=0

#Scaling
eu=cmdscale(eu)
eudt=as.data.table(eu)
colnames(eudt)=c("C1","C2")

e=plot_ly(eudt, x = ~C1, y = ~C2, color = ~odds7$ou, colors = c('#BF382A', '#0C4B8E')) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = 'PC1'),
                     yaxis = list(title = 'PC2')))
e

Results of PCA and MDS are similar. We can not call a difference regarding all the graphs

Task2

require(data.table)
require(TunePareto)
## Loading required package: TunePareto
require(glmnet)
## Loading required package: glmnet
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-16
testStart=as.Date('2018-11-16')
trainStart=as.Date('2012-07-15')
rem_miss_threshold=0.01 #parameter for removing bookmaker odds with missing ratio greater than this threshold

source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\data_preprocessing.r')
source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\feature_extraction.r')
source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\performance_metrics.r')
source('C:\\Users\\a-adsene\\Desktop\\ETM58D\\train_models.r')


# read data
matches_raw=readRDS("C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_matches.rds")
odd_details_raw=readRDS('C:\\Users\\a-adsene\\Desktop\\ETM58D\\df9b1196-e3cf-4cc7-9159-f236fe738215_odd_details.rds')

# preprocess matches
matches=matches_data_preprocessing(matches_raw)
## Warning in strptime(Match_DateTime, "%Y-%m-%d %H:%M:%OS"): POSIXlt column
## type detected and converted to POSIXct. We do not recommend use of POSIXlt
## at all because it uses 40 bytes to store one date.
## Following postponed matches are KEPT during data_preprocessing:
##                                leagueId  matchId    home     away  score
## 1: df9b1196-e3cf-4cc7-9159-f236fe738215 EX7OmEj1 chelsea brighton POSTP.
##          date   type
## 1: 1551009600 soccer
# preprocess odd data
odd_details=details_data_preprocessing(odd_details_raw,matches)

# extract open and close odd type features from multiple bookmakers
features=extract_features.openclose(matches,odd_details,pMissThreshold=rem_miss_threshold,trainStart,testStart)
## Number of bookmakers with proportion of missings below 0.01 since 2012-07-15 : 12 
## Number of bookmakers with no missings since testStart 2018-11-16 : 11
## Warning in `[.data.table`(matches, , -c("Home", "Away", "Home_Score",
## "Away_Score", : column(s) not removed because not found:
## Result_Home,Result_Tie,Result_Away
# divide data based on the provided dates 
train_features=features[Match_Date>=trainStart & Match_Date<testStart] 
test_features=features[Match_Date>=testStart] 

We changed alpha as 0 to eleminate the penalty

# run glmnet on train data with tuning lambda parameter based on RPS and return predictions based on lambda with minimum RPS
predictions=train_glmnet(train_features, test_features,not_included_feature_indices=c(1:5), alpha=0,nlambda=50, tune_lambda=TRUE,nofReplications=2,nFolds=10,trace=T)
## Iteration 1: Fold 1 of Replication 1
## Iteration 2: Fold 2 of Replication 1
## Iteration 3: Fold 3 of Replication 1
## Iteration 4: Fold 4 of Replication 1
## Iteration 5: Fold 5 of Replication 1
## Iteration 6: Fold 6 of Replication 1
## Iteration 7: Fold 7 of Replication 1
## Iteration 8: Fold 8 of Replication 1
## Iteration 9: Fold 9 of Replication 1
## Iteration 10: Fold 10 of Replication 1
## Iteration 11: Fold 1 of Replication 2
## Iteration 12: Fold 2 of Replication 2
## Iteration 13: Fold 3 of Replication 2
## Iteration 14: Fold 4 of Replication 2
## Iteration 15: Fold 5 of Replication 2
## Iteration 16: Fold 6 of Replication 2
## Iteration 17: Fold 7 of Replication 2
## Iteration 18: Fold 8 of Replication 2
## Iteration 19: Fold 9 of Replication 2
## Iteration 20: Fold 10 of Replication 2

Here are the results;

predictions
## $predictions
##       matchId Match_Result       Over      Under
##   1: 0AFLSIpA        Under 0.04302878 0.95697122
##   2: 0Aeb7pV0         Over 0.96550069 0.03449931
##   3: 0QqMMPUm         Over 0.96402193 0.03597807
##   4: 0Uu6adXM        Under 0.03868408 0.96131592
##   5: 0pC7YMTa         Over 0.96078852 0.03921148
##  ---                                            
## 156: zJAnjne2         Over 0.96498184 0.03501816
## 157: zNtirbpc         Over 0.96139519 0.03860481
## 158: zROebAze         Over 0.96161380 0.03838620
## 159: zar1vkdj         Over 0.96321508 0.03678492
## 160: zeSaajYF         Over 0.96250757 0.03749243
## 
## $cv_stats
## $cv_stats$lambda.min
## [1] 0.0499506
## 
## $cv_stats$lambda.1se
## [1] 0.0499506
## 
## $cv_stats$meanRPS_min
## [1] 0.001646606
## 
## $cv_stats$meanRPS_1se
## [1] 0.001646606